pollution_gather_aqi %>% 
  group_by(pollutants,year) %>%
  summarize(mean_aqi = mean(aqi, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = mean_aqi, color = pollutants)) + 
    geom_point() + geom_line() + 
    theme(legend.position = "bottom")

From the point and line plot given above, we can see that overall the

pollution_aqi %>% 
  mutate(year = fct_reorder(year, no2_aqi)) %>% 
  plot_ly(y = ~no2_aqi, color = ~year, type = "box",
          colors = "Set2")
pollution_aqi %>% 
  mutate(year = fct_reorder(year, o3_aqi)) %>% 
  plot_ly(y = ~o3_aqi, color = ~year, type = "box",
          colors = "Set2")
pollution_aqi %>% 
  mutate(year = fct_reorder(year, so2_aqi)) %>% 
  plot_ly(y = ~so2_aqi, color = ~year, type = "box",
          colors = "Set2")
pollution_aqi %>% 
  mutate(year = fct_reorder(year, co_aqi)) %>% 
  plot_ly(y = ~co_aqi, color = ~year, type = "box",
          colors = "Set2")
## Warning: Ignoring 45 observations

From the box plots given above, we can further categorize our data into different level of aqi given each pollutants We have searched the cut off point for each pollutants

pollution_aqi_cat <- pollution_aqi %>% 
  mutate(., o3_aqi_cat = ifelse(o3_aqi %in% 0:54, "good", ifelse(o3_aqi %in% 55:70, "moderate", ifelse(o3_aqi %in% 71:85, "unhealthy for senstitive", ifelse(o3_aqi %in% 86:105, "unhealth", "very unhealthy")))),
         co_aqi_cat = ifelse(co_aqi %in% 0:4.4, "good", ifelse(co_aqi %in% 4.5:9.4, "moderate", ifelse(co_aqi %in% 9.5:12.4, "unhealthy for senstitive", ifelse(co_aqi %in% 12.5:15.4, "unhealth", ifelse(co_aqi %in% 15.5:30.4, "very unhealth", "hazardous"))))),
         so2_aqi_cat = ifelse(so2_aqi %in% 0:35, "good", ifelse(so2_aqi %in% 36:75, "moderate", ifelse(so2_aqi %in% 76:185, "unhealthy for senstitive", ifelse(so2_aqi %in% 186:304, "unhealth", ifelse(so2_aqi %in% 305:604, "very unhealth", "hazardous"))))),
         no2_aqi_cat = ifelse(no2_aqi %in% 0:53, "good", ifelse(no2_aqi %in% 54:100, "moderate", ifelse(no2_aqi %in% 101:360, "unhealthy for senstitive", ifelse(no2_aqi %in% 361:649, "unhealth", ifelse(no2_aqi %in% 650:1249, "very unhealth", "hazardous"))))),
         ) %>% 
  select(-no2_aqi, -co_aqi, -so2_aqi, -o3_aqi)
pollution_aqi_cat <- pollution_aqi_cat %>% 
  gather(key = pollutants, value = aqi, o3_aqi_cat:no2_aqi_cat)

pollution_aqi_cat %>% 
  group_by(year, aqi) %>% 
  summarize(n = n()) %>% 
  spread(key = aqi, value = n) 
## # A tibble: 7 x 8
## # Groups:   year [7]
##    year  good hazardous moderate unhealth `unhealthy for senstitive`
## * <chr> <int>     <int>    <int>    <int>                      <int>
## 1    10 36967     16180     2513      380                        650
## 2    11 47392     23246     4470      451                        954
## 3    12 55143     21028     2347      399                        620
## 4    13 64538     15412     2070      320                        623
## 5    14 72417      6585     1065      182                        438
## 6    15 68940      7083     1070      257                        487
## 7    16 14701      1570      119        7                         29
## # ... with 2 more variables: `very unhealth` <int>, `very unhealthy` <int>
#overall aqi bar plot
pollution_aqi_cat %>% 
  mutate(year = fct_infreq(year)) %>%
  ggplot(aes(x = year, fill = aqi)) + geom_bar()

#o3_aqi bar plot
pollution_aqi_cat %>% 
  filter(pollutants == "o3_aqi_cat") %>% 
  ggplot(aes(x = year, fill = aqi)) + geom_bar()

#co
pollution_aqi_cat %>% 
  filter(pollutants == "co_aqi_cat") %>% 
  ggplot(aes(x = year, fill = aqi)) + geom_bar()

#no2
pollution_aqi_cat %>% 
  filter(pollutants == "no2_aqi_cat") %>% 
  ggplot(aes(x = year, fill = aqi)) + geom_bar()

#so2
pollution_aqi_cat %>% 
  filter(pollutants == "so2_aqi_cat") %>% 
  ggplot(aes(x = year, fill = aqi)) + geom_bar()